Go back to the Preprocessing page. This link might be useful to keep track of the files created during the preprocessing.

Let us set some global options for all code chunks in this document.

knitr::opts_chunk$set(
  message = FALSE,    # Disable messages printed by R code chunks
  warning = FALSE,    # Disable warnings printed by R code chunks
  echo = TRUE,        # Show R code within code chunks in output
  include = TRUE,     # Include both R code and its results in output
  eval = TRUE,       # Evaluate R code chunks
  cache = FALSE,       # Enable caching of R code chunks for faster rendering
  fig.align = "center",
  out.width = "100%",
  retina = 2,
  error = TRUE,
  collapse = FALSE
)
rm(list = ls())
set.seed(1982)

1 Import libraries

# Install R-INLA package
# install.packages("INLA",repos = c(getOption("repos"),INLA ="https://inla.r-inla-download.org/R/testing"), dep = TRUE)
# Update R-INLA package
# inla.upgrade(testing = TRUE)
# Install inlabru package
# remotes::install_github("inlabru-org/inlabru", ref = "devel")
# Install rSPDE package
# remotes::install_github("davidbolin/rspde", ref = "devel")
# Install MetricGraph package
# remotes::install_github("davidbolin/metricgraph", ref = "devel")

library(INLA)
library(inlabru)
library(rSPDE)
library(MetricGraph)

library(plotly)
library(dplyr)
library(tidyr)
library(sf)
library(mapview)
library(listviewer)
library(jsonlite)
library(ggplot2)

library(here) # here() starts from the home directory
library(rmarkdown)

rm(list = ls()) # Clear the workspace
set.seed(1982) # Set seed for reproducibility

2 Load the data

# Load the data
load(here("data_files/day7142128hour13noconsecutivezeroes.RData")) # just to get the crs

# Load the network from tomtom
load(here("data_files/tomtom.RData")) 

2.1 Explore the data

tomtom |> head(5) |> paged_table()
tomtom |> dim()
## [1] 67199    39
ggplot(data = tomtom) +
  geom_sf(aes(color = SpeedLimit)) +
  scale_color_viridis_c(option = "D") +
  theme_minimal()

3 Filter and format the street network data

# Build polygon to cut the network and the data
polygon <- st_multipoint(c(st_point(c(-122.53000, 37.69702)),
                          st_point(c(-122.37000, 37.69702)),
                          st_point(c(-122.37000, 37.82600)),
                          st_point(c(-122.53000, 37.82600)))) %>%
  st_cast("POLYGON") %>%
  st_sfc(crs = st_crs(df)) # df dataset needs to be loaded to get the crs

# Filter and prepare the network data
from.tomtom <- tomtom %>%
  dplyr::select(-Id, -Segment.Id, -NewSegId, -timeSet, -dateRange, -standardDeviationSpeed, -travelTimeStandardDeviation) %>% # Remove unnecessary columns
  filter(FRC != "7") %>% # Remove tomtom class 5, 6 and 7 
  mutate(value = SpeedLimit, road_type = paste("class_", FRC, sep = ""), aux = paste("class_", FRC, sep = "")) %>% # Create road_type and aux variables
  pivot_wider(names_from = aux, values_from = value, values_fill = list(value = 0)) %>% # Use aux and value to create one-hot encoding for road_type
  mutate(upto1 = class_0 + class_1) %>% # Create upto1 variable
  mutate(upto3 = upto1 + class_3) %>% # Create upto3 variable
  mutate(upto4 = upto3 + class_4) %>% # Create upto4 variable
  mutate(upto5 = upto4 + class_5) %>% # Create upto5 variable
  mutate(upto6 = upto5 + class_6) %>% # Create upto6 variable
  mutate(Length  = Length/1000) %>%  # Transform Length from meters to kilometers
  mutate(density = sampleSize/Length) %>% # Create density variable (per day)
  mutate(density_per_hour = density/24) %>% # Create density_per_hour variable
  st_transform(crs = st_crs(df)) %>% # Transform to the same crs as the data
  st_filter(x = ., y = polygon, .predicate = st_within) # Filter by the polygon

road_types <- paste0("FRC", paste(sort(unique(from.tomtom$FRC)), collapse = ""))

# Get the weights and edges
weights <- from.tomtom %>% st_drop_geometry()
edges <- from.tomtom$geometry

3.1 Explore the data

Data frame from.tomtom is the last version of the data before building the graph. Below we explore its structure and show how it looks like.

from.tomtom |> head(5) |> paged_table()
from.tomtom |> dim()
## [1] 44319    46
ggplot(data = from.tomtom) +
  geom_sf(aes(color = SpeedLimit)) +
  scale_color_viridis_c(option = "D") +
  theme_minimal()

4 Build and save the graph

# Build the graph
graph <- graph_components$new(edges = edges, which_longlat = "sf", longlat = TRUE, edge_weights = weights)

# Get the largest connected component
sf_graph = graph$get_largest()

# Save the graph (notice that it has no data)
save(sf_graph, file = here("data_files/graph_construction_on_27JUN2024_FRC013456.RData"))

4.1 Explore the data

Below we show how the graph looks like.

sf_graph$plot(vertex_size = 0, 
              edge_width = 1, 
              edge_weight = "SpeedLimit", 
              edge_width_weight = "SpeedLimit", 
              edge_color = "SpeedLimit",
              scale_color_weights = ggplot2::scale_color_viridis_c(option = "D"))

graph_w <- sf_graph$get_edge_weights()
graph_w |> head(5) |> paged_table()
graph_w |> dim()
## [1] 44284    45